@article{b1,
 author = {Ahmad, Faraz and Chakradhar, Srimat T. and Raghunathan, Anand and Vijaykumar, T. N.},
 title = {Tarazu: Optimizing MapReduce on Heterogeneous Clusters},
 journal = {SIGARCH Comput. Archit. News},
 issue_date = {March 2012},
 volume = {40},
 number = {1},
 month = mar,
 year = {2012},
 issn = {0163-5964},
 pages = {61--74},
 numpages = {14},
 publisher = {ACM},
 address = {New York, NY, USA}
}
@inproceedings{b2,
 author = {Zaharia, Matei and Borthakur, Dhruba and Sen Sarma, Joydeep and Elmeleegy, Khaled and Shenker, Scott and Stoica, Ion},
 title = {Delay Scheduling: A Simple Technique for Achieving Locality and Fairness in Cluster Scheduling},
 booktitle = {Proceedings of the 5th European Conference on Computer Systems},
 series = {EuroSys '10},
 year = {2010},
 isbn = {978-1-60558-577-2},
 location = {Paris, France},
 pages = {265--278},
 numpages = {14},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {cluster computing, fair sharing, mapreduce, scheduling},
} 
@inproceedings{b3,
 author = {Ananthanarayanan, Ganesh and Agarwal, Sameer and Kandula, Srikanth and Greenberg, Albert and Stoica, Ion and Harlan, Duke and Harris, Ed},
 title = {Scarlett: Coping with Skewed Content Popularity in Mapreduce Clusters},
 booktitle = {Proceedings of the Sixth Conference on Computer Systems},
 series = {EuroSys '11},
 year = {2011},
 isbn = {978-1-4503-0634-8},
 location = {Salzburg, Austria},
 pages = {287--300},
 numpages = {14},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {data locality, datacenter storage, fairness, replication},
} 
@INPROCEEDINGS{b4, 
author={C. L. {Abad} and Y. {Lu} and R. H. {Campbell}}, 
booktitle={2011 IEEE International Conference on Cluster Computing}, 
title={DARE: Adaptive Data Replication for Efficient Cluster Scheduling}, 
year={2011}, 
volume={}, 
number={}, 
pages={159-168}, 
keywords={data handling;information retrieval;probability;sampling methods;scheduling;social networking (online);DARE;cluster scheduling;data intensive system;data locality problem;distributed adaptive data replication algorithm;probabilistic sampling;competitive aging algorithm;remote data access;network usage;Facebook;FIFO scheduler;Hadoop;FAIR scheduler;delay scheduling;turnaround time;job slowdown;Bandwidth;Heuristic algorithms;Distributed databases;Clustering algorithms;Cloud computing;Aging;Probabilistic logic;MapReduce;replication;scheduling;locality}, 
month={Sep.},}
@inproceedings{b5,
 author = {Jalaparti, Virajith and Bodik, Peter and Menache, Ishai and Rao, Sriram and Makarychev, Konstantin and Caesar, Matthew},
 title = {Network-Aware Scheduling for Data-Parallel Jobs: Plan When You Can},
 booktitle = {Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication},
 series = {SIGCOMM '15},
 year = {2015},
 location = {London, United Kingdom},
 pages = {407--420},
 numpages = {14},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {cluster schedulers, cross-layer optimization, data-intensive applications, joint data and compute placement},
} 
@INPROCEEDINGS{b6, 
author={L. {Xu} and A. R. {Butt} and S. {Lim} and R. {Kannan}}, 
booktitle={2018 IEEE International Conference on Cluster Computing (CLUSTER)}, 
title={A Heterogeneity-Aware Task Scheduler for Spark}, 
year={2018}, 
volume={}, 
number={}, 
pages={245-256}, 
keywords={Big Data;graph theory;learning (artificial intelligence);parallel processing;resource allocation;scheduling;diverse applications;graph computation;multidimensional heterogeneity;resource scheduling;hardware capabilities;heterogeneity-aware task scheduling system;task-level resource characteristics;dominant scheduling factor;standard Spark scheduler;scientific computing;dynamic resource;hardware characteristics;Big Data processing systems;machine learning;heterogeneous hardware;RUPAM;Task analysis;Sparks;Dynamic scheduling;Hardware;Big Data;Resource management;Processor scheduling;Spark;Scheduling;Heterogeneity;Resource Management;Big Data}, 
month={Sep.}}
@INPROCEEDINGS{b7, 
author={F. {Pan} and J. {Xiong} and Y. {Shen} and T. {Wang} and D. {Jiang}}, 
booktitle={2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)}, 
title={H-Scheduler: Storage-Aware Task Scheduling for Heterogeneous-Storage Spark Clusters}, 
year={2018}, 
volume={}, 
number={}, 
pages={1-9}, 
keywords={Big Data;computer centres;parallel processing;resource allocation;scheduling;storage management;H-Scheduler;storage-aware task scheduling;heterogeneous-storage Spark clusters;heterogeneous storage devices;heterogeneous-storage-aware feature;hybrid storage clusters;task scheduling strategy;data centers;Big Data workloads;Task analysis;Performance evaluation;Big Data;Sparks;Distributed databases;Schedules;Scheduling;Big Data Processing;Heterogeneous Storage;Task Scheduling}, 
month={Dec},}
@INPROCEEDINGS{B8, 
author={B. {Wang} and J. {Jiang} and G. {Yang}}, 
booktitle={2015 IEEE Conference on Computer Communications (INFOCOM)}, 
title={ActCap: Accelerating MapReduce on heterogeneous clusters with capability-aware data placement}, 
year={2015}, 
volume={}, 
number={}, 
pages={1328-1336}, 
keywords={data handling;Markov processes;parallel programming;ActCap;MapReduce acceleration;heterogeneous clusters;inter-node data transfer;data distribution;Markov chain;node-capability-aware data placement;Tarazu;Markov processes;Data transfer;Computational modeling;Benchmark testing;Hardware;Conferences;Computers;MapReduce;Heterogeneous Clusters;Data Placement;Load Balancing;Big Data}, 
month={April},}

@Inbook{b9,
author="Karp, Richard M.",
editor="Miller, Raymond E.
and Thatcher, James W.
and Bohlinger, Jean D.",
title="Reducibility among Combinatorial Problems",
bookTitle="Complexity of Computer Computations: Proceedings of a symposium on the Complexity of Computer Computations, held March 20--22, 1972, at the IBM Thomas J. Watson Research Center, Yorktown Heights, New York, and sponsored by the Office of Naval Research, Mathematics Program, IBM World Trade Corporation, and the IBM Research Mathematical Sciences Department",
year="1972",
publisher="Springer US",
address="Boston, MA",
pages="85--103",
abstract="A large class of computational problems involve the determination of properties of graphs, digraphs, integers, arrays of integers, finite families of finite sets, boolean formulas and elements of other countable domains. Through simple encodings from such domains into the set of words over a finite alphabet these problems can be converted into language recognition problems, and we can inquire into their computational complexity. It is reasonable to consider such a problem satisfactorily solved when an algorithm for its solution is found which terminates within a number of steps bounded by a polynomial in the length of the input. We show that a large number of classic unsolved problems of covering, matching, packing, routing, assignment and sequencing are equivalent, in the sense that either each of them possesses a polynomial-bounded algorithm or none of them does."
}


@inproceedings{b10,
  booktitle={Algorithmics for hard problems: introduction to combinatorial optimization, randomization, approximation, and heuristics},
  year={2003},
  organization={Springer Science}
} 
@Misc{b11,
howpublished = {\url{https://en.wikipedia.org/wiki/Integer\_programming}},
title = {Integer programming},
year={2018}}

@inproceedings{b12,
  title={Policy gradient methods for reinforcement learning with function approximation},
  author={Grimmett, Geoffrey and Geoffrey, R., Grimmett and David, Stirzaker},
  booktitle={Probability and random processes},
  pages={1057--1063},
  year={2000}
}
@Misc{b13,
howpublished = {\url{https://en.wikipedia.org/wiki/ Martingale\_(probability\_theory)}},
title = {Martingale (probability theory)},
year={2019}}

@Misc{b14,
howpublished = {\url{https://hadoop.apache.org}},
title = {Apache Hadoop},
year={2018}}
@Misc{b15,
howpublished = {\url{https://spark.apache.org}},
title = {Apache Spark},
year={2018}}
@inproceedings{b18,
 author = {Guo, Zhenhua and Fox, Geoffrey and Zhou, Mo},
 title = {Investigation of Data Locality in MapReduce},
 booktitle = {Proceedings of the 2012 12th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (Ccgrid 2012)},
 series = {CCGRID '12},
 year = {2012},
 pages = {419--426},
 numpages = {8},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 keywords = {MapReduce, Hadoop, scheduling, data locality},
} 
@Misc{b19,
howpublished = {\url{https://hadoop.apache.org/hdfs}},
title = {HDFS},
year={2018}}
@Misc{b20,
howpublished = {\url{https://code.google.com/p/googleclusterdata/}},
title = {Google Cluster Trace},
year={2012}}

@inproceedings{b25,
 author = {Ahmad, Faraz and Chakradhar, Srimat T. and Raghunathan, Anand and Vijaykumar, T. N.},
 title = {Tarazu: Optimizing MapReduce on Heterogeneous Clusters},
 booktitle = {Proceedings of the Seventeenth International Conference on Architectural Support for Programming Languages and Operating Systems},
 series = {ASPLOS XVII},
 year = {2012},
 isbn = {978-1-4503-0759-8},
 location = {London, England, UK},
 pages = {61--74},
 numpages = {14},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {MapReduce, cluster scheduling, heterogeneous clusters, load imbalance, shuffle},
}
@inproceedings {b26,
author = {Rohan Gandhi and Di Xie and Y. Charlie Hu},
title = {{PIKACHU}: How to Rebalance Load in Optimizing MapReduce On Heterogeneous Clusters},
booktitle = {Presented as part of the 2013 {USENIX} Annual Technical Conference ({USENIX} {ATC} 13)},
year = {2013},
address = {San Jose, CA},
pages = {61--66},
publisher = {{USENIX}},
}
@ARTICLE{b27, 
author={M. {Malik} and K. {Neshatpour} and S. {Rafatirad} and H. {Homayoun}}, 
journal={IEEE Transactions on Multi-Scale Computing Systems}, 
title={Hadoop Workloads Characterization for Performance and Energy Efficiency Optimizations on Microservers}, 
year={2018}, 
volume={4}, 
number={3}, 
pages={355-368}, 
keywords={Big Data;cloud computing;computer centres;data handling;parallel processing;public domain software;scheduling;energy-efficiency;microservers;energy efficiency optimizations;low-power embedded processors;high-performance server market;data center workloads;server computational power;current high-performance server architectures;physical design constraints;Hadoop MapReduce framework;Hadoop configuration parameters;MapReduce job performance;architecture level parameters tuning;Hadoop-based applications;Hadoop MapReduce applications performance;Big Data analytic applications;Big Data analytic applications;Energy efficiency;Tuning;Servers;Computer architecture;Big Data applications;Optimization;Application characterization;hadoop MapReduce;big data;microservers;energy-efficiency;performance;power and performance tuning parameters}, 
month={July},}
@inproceedings{b28,
  title={Accelerating datacenter workloads},
  author={Gupta, PK},
  booktitle={26th International Conference on Field Programmable Logic and Applications (FPL)},
  year={2016}
}
@misc{b29,
  title={Datacenter storage system},
  author={Kong, Jonathan},
  year={2014},
  month=apr # "~24",
  publisher={Google Patents},
  note={US Patent App. 13/694,001}
}
@inproceedings{b30,
  title={Decoupling datacenter studies from access to large-scale applications: A modeling approach for storage workloads},
  author={Delimitrou, Christina and Sankar, Sriram and Vaid, Kushagra and Kozyrakis, Christos},
  booktitle={2011 IEEE International Symposium on Workload Characterization (IISWC)},
  pages={51--60},
  year={2011},
  organization={IEEE}
}
@ARTICLE{b31, 
author={Y. {Guo} and Y. {Gong} and Y. {Fang} and P. P. {Khargonekar} and X. {Geng}}, 
journal={IEEE Transactions on Parallel and Distributed Systems}, 
title={Energy and Network Aware Workload Management for Sustainable Data Centers with Thermal Storage}, 
year={2014}, 
volume={25}, 
number={8}, 
pages={2030-2042}, 
keywords={computer centres;green computing;power aware computing;renewable energy sources;resource allocation;scheduling;stochastic programming;sustainable development;energy aware workload management;network aware workload management;sustainable data centers;thermal storage management;carbon footprint reduction;renewable energy sources;green energy;geographical load balancing;opportunistic scheduling;delay-tolerant workloads;green energy integration;brown energy usage;stochastic program;online control algorithm;Lyapunov optimization technique;stochastic cost minimization algorithm;SCMA;Renewable energy sources;Bandwidth;Cooling;Energy storage;Electricity;Green products;Optimization;Data center;energy management;thermal storage;load scheduling;Lyapunov optimization}, 
doi={10.1109/TPDS.2013.278}, 
ISSN={1045-9219}, 
month={Aug},}

@Misc{b32,
howpublished = {\url{https://en.wikipedia.org/wiki/Hard\_disk\_drive}},
title = {HDD},
year={2018}}

@Misc{b33,
howpublished = {\url{https://en.wikipedia.org/wiki/Solid-state\_drive}},
title = {SSD},
year={2018}}
@Misc{b34,
howpublished = {\url{http://oceanbase.org.cn/?p=151}},
title = {Oceanbase},
year={2016}}
@article{b35,
author = {Stone,John E.  and Gohara,David  and Shi,Guochun },
title = {OpenCL: A Parallel Programming Standard for Heterogeneous Computing Systems},
journal = {Computing in Science \& Engineering},
volume = {12},
number = {3},
pages = {66-73},
year = {2010},
doi = {10.1109/MCSE.2010.69},

URL = { 
        https://aip.scitation.org/doi/abs/10.1109/MCSE.2010.69
},
eprint = { 
        https://aip.scitation.org/doi/pdf/10.1109/MCSE.2010.69   
}}
@INPROCEEDINGS{b36, 
author={ {Jiong Xie} and {Shu Yin} and {Xiaojun Ruan} and {Zhiyang Ding} and {Yun Tian} and J. {Majors} and A. {Manzanares} and {Xiao Qin}}, 
booktitle={2010 IEEE International Symposium on Parallel Distributed Processing, Workshops and Phd Forum (IPDPSW)}, 
title={Improving MapReduce performance through data placement in heterogeneous Hadoop clusters}, 
year={2010}, 
volume={}, 
number={}, 
pages={1-9}, 
keywords={data mining;distributed processing;indexing;information resources;pattern clustering;resource allocation;MapReduce;heterogeneous Hadoop cluster;distributed processing model;large scale data intensive application;data mining;Web indexing;open source implementation;data locality;virtualized data centers;balanced data processing load;data placement strategy;Peer to peer computing;Data processing;Large-scale systems;Data mining;Indexing;Open source software;Facebook;Programming profession;Computer science;Software engineering}, 
doi={10.1109/IPDPSW.2010.5470880}, 
ISSN={}, 
month={April},}
@article{b37,
  title={Crail: A High-Performance I/O Architecture for Distributed Data Processing.},
  author={Stuedi, Patrick and Trivedi, Animesh and Pfefferle, Jonas and Stoica, Radu and Metzler, Bernard and Ioannou, Nikolas and Koltsidas, Ioannis},
  journal={IEEE Data Eng. Bull.},
  volume={40},
  number={1},
  pages={38--49},
  year={2017}
}
@inproceedings{b38,
  title={International GPS service for Geodynamics},
  author={Beutler, Gerhard and Brockmann, E},
  booktitle={Proceedings of the 1993 IGS Workshop},
  volume={369},
  year={1993},
  organization={Druckerei der Universitaet Bern}
}
@misc{b39,
  title={System and method for analysis and management of logs and events},
  author={Berg, Gal and Koschitzky, Haim and Saguy, Amir and Koschitzky, Omry},
  year={2011},
  month=feb # "~22",
  publisher={Google Patents},
  note={US Patent 7,895,167}
}
@Misc{b40,
howpublished = {\url{https://storm.apache.org}},
title = {Apache Storm},
year={2018}}
@article{b41,
 author = {Dean, Jeffrey and Ghemawat, Sanjay},
 title = {MapReduce: Simplified Data Processing on Large Clusters},
 journal = {Commun. ACM},
 issue_date = {January 2008},
 volume = {51},
 number = {1},
 month = jan,
 year = {2008},
 issn = {0001-0782},
 pages = {107--113},
 numpages = {7},
 publisher = {ACM},
 address = {New York, NY, USA},
}
@article{b42,
  title={Optimizing shuffle performance in spark},
  author={Davidson, Aaron and Or, Andrew},
  journal={University of California, Berkeley-Department of Electrical Engineering and Computer Sciences, Tech. Rep},
  year={2013}
}
@ARTICLE{b43, 
author={M. {Mitzenmacher}}, 
journal={IEEE Transactions on Parallel and Distributed Systems}, 
title={The power of two choices in randomized load balancing}, 
year={2001}, 
volume={12}, 
number={10}, 
pages={1094-1104}, 
keywords={resource allocation;queueing theory;exponential distribution;stochastic processes;distributed algorithms;randomized load balancing;Poisson stream;first-in first-out protocol;service time;exponential distribution;supermarket model;deterministic model;finite systems;simulations;queuing theory;distributed systems;limiting systems;Load management;Queueing analysis;Protocols;Load modeling;Predictive models;System analysis and design;Computer applications;Resource management;H infinity control;Differential equations}, 
month={Oct},}

@book{b44,
  title={Clustering active disk data to improve disk performance},
  author={Staelin, Carl and Garcia-Molina, Hector},
  year={1990},
  publisher={Princeton University, Department of Computer Science}
}

@misc{b46,
  title={SSD storage device},
  author={Shim, Shady},
  year={2019},
  month="5",
  publisher={Google Patents},
  note={US Patent App. 29/628,078}
}
@misc{b47,
  title={High rpm hard disk drive testing},
  author={Tsoukatos, Antonia and Rausch, Tim and Erden, Mehmet Fatih and Parish, Benjamin W and Ramakrishna, Prasanna Manja and Tayefeh, Morovat Bryan and Hon, Sai Sian and Guo, Chengyi and Lim, Teck Khoon and Teo, Song Wee and others},
  year={2019},
  month= "may",
  note={US Patent App. 16/058,819}
}
@inproceedings{b45,
  title={Nvm express and the pci express ssd revolution},
  author={Cobb, Danny and Huffman, Amber},
  booktitle={Intel Developer Forum},
  year={2012},
  organization={Intel}
}
@InProceedings{b48,
author="Phani Bhushan, R.
and Somayajulu, D. V. L. N.
and Venkatraman, S.
and Subramanyam, R. B. V.",
editor="Satapathy, Suresh Chandra
and Raju, K. Srujan
and Shyamala, K.
and Krishna, D. Rama
and Favorskaya, Margarita N.",
title="Data Aware Distributed Storage (DAS) for Performance Improvement Across a Hadoop Commodity Cluster",
booktitle="Advances in Decision Sciences, Image Processing, Security and Computer Vision",
year="2020",
publisher="Springer International Publishing",
address="Cham",
pages="350--357",
abstract="Big Data is the order of the day and has found in-roads into many areas of working other than just the internet, which has been the breeding ground for this technology. The Remote Sensing domain has also seen growth in volumes and velocity of spatial data and thus the term Spatial Big Data has been coined to refer to this type of data. Processing the spatial data for applications such as urban mapping, object detection, change detection have undergone changes for the sake of computational efficiency from being single monolithic centralized processing to distributed processing and from single core CPUs to Multicore CPUs and further to GPUs and specific hardware in terms of architecture. The two major problems faced in this regard is the size of the data to be processed per unit of memory/time and the storage and retrieval of data for efficient processing. In this paper, we discuss a method of distributing data across a HDFS cluster, which aids in fast retrieval and faster processing per unit of available memory in the Image Processing domain. We evaluate our technique and compare the same with the traditional approach on a 4-node HDFS cluster. Significant improvement is found while performing edge detection on large spatial data, which has been tabulated in the results section.",
isbn="978-3-030-24322-7"
}
